#This file generates the distances between tract centroids and notary offices

################################
# 1. Loading data and packages #
################################

#This clears the working space from old variables.
rm(list=ls()) 

#This sets the working directory.
setwd("C:/PATH_TO_REPLICATION_FILES")

###########################################
# Layer of census boundaries (CensusUnits)#
###########################################
#Load the package which allows you to read-in data from ArcGIS
require(rgdal)
require(maptools)
require(raster)
require(plyr)
require(sp)
require(tibble)
require(spatialEco)
require(readxl)
require(haven)
require(foreign)
require(mgcv) #This is used to find unique addresses 
#Load the CensusUnits data (this is the dataset of the polygons of all Census units)
load("Data_Europe/Generated_data/CensusHub.RData", envir = .GlobalEnv)
rm(EU_8)
#Keep only observations in Belgium
CensusUnits<-CensusUnits[CensusUnits$Country=="BE",]
#Keep only relevant information
CensusUnits<-CensusUnits[,1:4]
#Generate variable reflecting the NIS code
CensusUnits$nis<-as.numeric(substr(CensusUnits$CENSUS_ID,7,11))
#Find the projections of the CensusUnits
crs_census<-crs(CensusUnits)
#Define the projections of the notaries
crs_notaries <- CRS("+proj=longlat +datum=WGS84")
CensusUnits <- spTransform(CensusUnits, crs_notaries) 
#Find Census Unit centroids
centroids_census <- getSpPPolygonsLabptSlots(CensusUnits)
CensusUnits$Lat<-centroids_census[,2]
CensusUnits$Lon<-centroids_census[,1]
rm(centroids_census)
#####################
# Geocoded notaries #
#####################
notaries <- read_dta("Data_Belgium/BE_notary_data.dta")
#We drop those who don't have a Latitude
notaries<-notaries[is.na(notaries$Lat_ext)==FALSE,]
summary(notaries$Lat_ext)
summary(CensusUnits$Lat)
summary(notaries$Lon_ext)
summary(CensusUnits$Lon)


###########################
# 2. CALCULATING DISTANCE #
###########################
earth.dist <- function (long1, lat1, long2, lat2)
{
  rad <- pi/180
  a1 <- lat1 * rad
  a2 <- long1 * rad
  b1 <- lat2 * rad
  b2 <- long2 * rad
  dlon <- b2 - a2
  dlat <- b1 - a1
  a <- (sin(dlat/2))^2 + cos(a1) * cos(b1) * (sin(dlon/2))^2
  c <- 2 * atan2(sqrt(a), sqrt(1 - a))
  R <- 6378.145
  d <- R * c
  return(d)
}
#dist_km[i,j] is the distance from notary i to municipality j
dist_km<-matrix(,nrow=nrow(notaries)*nrow(CensusUnits),ncol=3)
rows_of_matrix=nrow(notaries)*nrow(CensusUnits)
distances <- data.frame(market_ID=character(rows_of_matrix), 
                        nis=character(rows_of_matrix), 
                        market_Lat=character(rows_of_matrix), 
                 market_Lon=numeric(rows_of_matrix),
                 market_Area=numeric(rows_of_matrix),
                 firm_ID=numeric(rows_of_matrix),
                 firm_Lat=numeric(rows_of_matrix),
                 firm_Lon=numeric(rows_of_matrix),
                 distance=numeric(rows_of_matrix)) 

distances$market_ID<-rep(CensusUnits$CENSUS_ID,nrow(notaries))
distances$nis<-rep(CensusUnits$nis,nrow(notaries))
distances$market_Lat<-rep(CensusUnits$Lat,nrow(notaries))
distances$market_Lon<-rep(CensusUnits$Lon,nrow(notaries))
distances$market_Area<-rep(CensusUnits$Area_sqkm,nrow(notaries))
distances$firm_ID<-rep(notaries$off_id,each=nrow(CensusUnits))
distances$firm_Lat<-rep(notaries$Lat_ext,each=nrow(CensusUnits))
distances$firm_Lon<-rep(notaries$Lon_ext,each=nrow(CensusUnits))
distances$distance<-round(earth.dist(distances$firm_Lon,distances$firm_Lat,distances$market_Lon,distances$market_Lat),3)
distances<-distances[,c("firm_ID","market_ID","distance")]
write.dta(distances,"Data_Belgium/Generated_data/distance_notary_Census_centroid_reduced.dta")

